**************************************/
/******************************************************************************/
/* Analytical Dataset Construction for Large-Firm Large-Bank Study            */
/* Created July 1, 2019                                                       */
/* Authors: Jon Pogach, Andrew Vizzi, Stefan Jacewitz                         */
/* Input Data:  QWI data (S:\JPogach\Walmart\US_county_firmsize.csv),         */
/*                   County-level by firm size                                */
/*              SOD data (Y: )                                                */
/*              demographic_data.csv, demographic data from BLS               */
/*              (assembled by Andrew)                                         */
/* Intermediate Data:                                                         */
/*              Pre-collapsed county-level-by-firm-size data with national    */
/*              trends by industry CountyIndustry.dta (county-year data       */
/*              summarizing industrial trends) CountyBanking.dta (county-year */
/*              data summarizing banking trends)                              */
/*              Dates and saves all intermediate data to Archive file.        */
/* Analytical Data:                                                           */
/*                                                                            */
/******************************************************************************/
/******************************************************************************/


quietly{

/*****************/
/* Preliminaries */
/*****************/

* Start timer for debugging
etime, start

cls
eststo clear
timer clear 1
timer on 1

* Sets local data location.
* CHANGE THIS TO MATCH CURRENT COMPUTER
global LocalData "C:\LOCAL DATA\Temp Data\Small Bank Transition"
cd "$LocalData"

* Choose whether or not maps are created (1 to create)
global Maps "1"

* Creates Archive folder, if one does not already exist
capture mkdir "$LocalData\Archive\"

* Creates Results folder, if one does not already exist
capture mkdir "$LocalData\Results\"

* Creates a label for today's date
local CurrentDate : display %tdDNCY daily("$S_DATE", "DMY")


/******************************************************************************/
/******************************************************************************/
/* Loading Data                                                               */
/******************************************************************************/
/******************************************************************************/


/***********************************************************/
/* Obtain county employment by industry-firm size          */
/* Saves CountyYearFirmSizes2digit.dta                     */
/***********************************************************/
local Data "CountyYearFirmSizes2digit"

noi disp "Obtaining national employment growth by firm-size size data..."
noi disp "Verifying that `Data' does not already exist..."
* This returns an error if the file does not exist.
capture confirm file "`Data'.dta"

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Loading US_county_firmsize_q2.csv..."
	noi etime
	import delimited "$LocalData\US_county_firmsize.csv", clear
	noi disp "Finished loading."
	noi etime
	drop race ethnicity education firmage ownercode sex agegrp quarter semp

	
	*Use total county by firm size less financial by firm size
	*Industry=0 indicates across all industries. geo_level!="S" kicks out state aggregates
	*geography is county
	keep if industry!="0" & geo_level!="S"

	* Verify that there are no duplicates
	isid geography year firmsize industry
	
	/*How well populated are the different size fields for county-years (universe is 0)
		firmsize |      Freq.     Percent        Cum.
	------------+-----------------------------------
				0 |     55,351       16.85       16.85
				1 |     55,334       16.85       33.70
				2 |     54,925       16.72       50.43
				3 |     55,002       16.75       67.17
				4 |     52,833       16.09       83.26
				5 |     54,970       16.74      100.00
	------------+-----------------------------------
			Total |    328,415      100.00
	*/
		
	gen emp_size = emp
	label var emp "Total Employment"

	keep geography industry year firmsize emp_size

	/* Combining Lowest Three Employment Categories into one */
	gen Firm123 = emp_size if firmsize==1 | firmsize==2 | firmsize==3
	* Finds total of all firms of size 1, 2, and 3
	bysort year geography industry: egen TotalFirm123 = sum(Firm123)
	drop Firm123 
	* Replace observation from firm size 1 with total of firms 1, 2, and 3
	replace emp_size = TotalFirm123 if firmsize==1
	drop TotalFirm123 
	* Keep only two sizes: Firms 1,2,3 and Firms 5. Keep total employment for shares.
	keep if firmsize==1 | firmsize==5 | firmsize==0

	* Drop if emp_size is missing
	* Note: These missings are not coded as zeros
	drop if missing(emp_size)

	* Keep only if the first year observed for each county is 2000.
	bysort geography: egen minyear = min(year)
	keep if minyear==2000
	drop minyear
	* Keep only if the last year observed for each county is 2017.
	bysort geography: egen maxyear = max(year)
	keep if maxyear==2017
	drop maxyear
	* Note that there is a large firm and small firm observation for each county-year).

	noi disp "Reshaping data where i(geography year) and j(firmsize)."
	reshape wide emp, i(geography industry year) j(firmsize)
	
	

*Require Observations of Large and Small Firm sizes for each county-year
noi drop if missing(emp_size5) | missing(emp_size1)
noi disp "11,401 observations deleted?"
	
*County shares by firmsize
egen tot_emp = rowtotal(emp_size1 emp_size5)
egen geoind = group(geography industry)

bysort geography year: egen county_emp0 = sum(emp_size0)
bysort geography year: egen county_emp1 = sum(emp_size1)
bysort geography year: egen county_emp5 = sum(emp_size5)

gen county_noti_emp0 = county_emp0 - emp_size0
gen county_noti_emp1 = county_emp1 - emp_size1
gen county_noti_emp5 = county_emp5 - emp_size5

xtset geoind year
	
*Specify period over which shocks occur
foreach span in 1 3 5 15 {
	*Construct Bartik Instrument
	gen gLargeFirm`span' 		= log(emp_size5+1) - log(L`span'.emp_size5+1) if emp_size5!=0 & L`span'.emp_size5!=.
	gen gSmallFirm`span' 		= log(emp_size1+1) - log(L`span'.emp_size1+1) if emp_size1!=0 & L`span'.emp_size1!=.
	gen gLargeFirmnoti`span' 	= (log(county_noti_emp5+1) - log(L`span'.county_noti_emp5+1))*county_noti_emp5/county_emp0 if emp_size5!=0 & L`span'.emp_size5!=.
	gen gSmallFirmnoti`span' 	= (log(county_noti_emp1+1) - log(L`span'.county_noti_emp1+1))*county_noti_emp1/county_emp0 if emp_size1!=0 & L`span'.emp_size1!=.
	gen gLargeFirmCounty`span'	= log(county_emp5) - log(L`span'.county_emp5+1)
	gen gSmallFirmCounty`span'	= log(county_emp1) - log(L`span'.county_emp1+1)
}

	noi disp "Saving `Data'.dta"
	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace
}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "


/***********************************************************/
/* Obtain national employment growth by industry-firm size */
/* Saves NationalIndustryFirmGrowth2digit.dta              */
/***********************************************************/
local Data "NationalIndustryFirmGrowth2digit"

noi disp "Obtaining national employment growth by firm-size size data..."
noi disp "Verifying that `Data' does not already exist..."
* This returns an error if the file does not exist.
capture confirm file "`Data'.dta"

* If an error is returned, then the commands below are run.
if _rc == 601 {
		
	noi disp "Data not found.  Building data..."
	noi disp "Loading National 2 digit employment.csv..."
	*Obtain National Employment Growth by Industry-Firmsize
	import delimited "$LocalData\US_county_firmsize.csv", clear
	noi disp "Finished loading."

	* THIS WAS LOWER. WHICH IS CORRECT?
	*Excluding Finance Industry and NAICS92, the latter because there is no data for large v. small
	drop if industry=="52" | industry=="92"

	* Verify that there are no duplicates
	isid geography industry year firmsize

	* Keep only if the first year observed for each county is 2000.
	bysort geography: egen minyear = min(year)
	keep if minyear==2000
	drop minyear
	* Keep only if the last year observed for each county is 2017.
	bysort geography: egen maxyear = max(year)
	keep if maxyear==2017
	drop maxyear

	sort  industry year firmsize
		collapse (sum) emp, by(industry year firmsize)	
		bysort industry year: egen totemp = sum(emp)
	label var emp "Total employment for the industry, year, and firm size"
		
	/* Combining Lowest Three Employment Categories into one */
	gen Firm123 = emp if firmsize==1 | firmsize==2 | firmsize==3
	* Finds total of all firms of size 1, 2, and 3
	bysort industry year: egen TotalFirm123 = sum(Firm123)
	drop Firm123 
	* Replace observation from firm size 1 with total of firms 1, 2, and 3
	replace emp = TotalFirm123 if firmsize==1
	drop TotalFirm123 
	* Keep only two sizes: Firms 1,2,3 and Firms 5
	keep if firmsize==1 | firmsize==5 


	gen sizeshare = emp/totemp
	label var sizeshare "Share for a size, industry, and year"

	sort industry firmsize year

	foreach span in 1 3 5 15 {
	* Generate the percentage change in employment
	by industry firmsize : gen g_nat_ind_size`span'_ = ln(emp) - ln(emp[_n-`span']) 
	label var g_nat_ind_size`span' "Growth in national industry employment, by size"

	* Generate the difference in the share of employment
	by industry firmsize : gen d_nat_ind_share`span'_ = sizeshare - sizeshare[_n-`span']
	label var d_nat_ind_share`span' "Change in national industry employment share, by size"
	}
	
		
	*First Observation by Industry
		sort industry firmsize year
		by industry firmsize : gen sizeshare1_2000 = sizeshare[1] 	
		gen dn_i_sizeshare1_t_00 = sizeshare - sizeshare1_2000

		by industry : gen emp_2000 = totemp[1] 	
		gen n_i_gr_t_00 = (totemp - emp_2000)/emp_2000

	*Plots of Size-Share by Industry
		
		twoway (line dn_i_sizeshare1_t_00 year if firmsize==1, yaxis(1))(line n_i_gr_t_00 year if firmsize==1, yaxis(2)), ytitle("Change in Size Share (Left), Emp Growth (Right)") legend(lab(1 "Chg Small Firm Share") lab(2 "Pct Chng Emp")) by(industry)
		graph export "$LocalData\Results\US_industry_SmallShare.png", replace
		graph export "$LocalData\Results\US_industry_SmallShare.pdf", replace
		
	
	
	
	* Keep only what is needed, and reshape to match diagnostic code format.
	keep industry firmsize year g_nat_ind_size* d_nat_ind_share*
	sort industry year
*	reshape wide g_nat_ind_size* d_nat_ind_share*, i(industry year) j(firmsize)

	
	noi disp "Saving `Data'.dta"
	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace
}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "
	

/***************************************/
/* Adding and Merging Demographic Data */
/* Saves Demographic.dta               */
/***************************************/
local Data "Demographic"

noi disp "Obtaining `Data' data..."
noi disp "Verifying that `Data' does not already exist..."
* This returns an error if the file does not exist.
capture confirm file "`Data'.dta"

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Loading demographic_data.csv..."
	import delimited "S:\JPogach\Walmart\Preliminary Analysis\demographic_data.csv", clear
	noi disp "Building Demographic.dta..."

	sort geography year
	* Verify that there are no duplicates by geography and year.
	isid geography year

	noi disp "Saving `Data'.dta"
	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace
}
* This runs if no error was returned.
else noi disp "`Data' already exists."
noi etime
noi disp " "
	



/***********************************************************/
/* Getting CPI data and setting 2001 as base year */
/***********************************************************/
local Data "CPI"

noi disp "Obtaining CPI data..."
noi disp "Verifying that `Data'.dta does not already exist..."
* This returns an error if the file does not exist.
capture confirm file `Data'.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Loading CPI CPALTT01USA661S.csv..."

	import delimited "C:\LOCAL DATA\Temp Data\Walmart\CPI CPALTT01USA661S.csv", clear
	keep year cpaltt01usa661s
	keep if year>1997
	gen base = cpaltt01usa661s[4]
	replace cpaltt01usa661s = cpaltt01usa661s/base
	drop base
	sort year
	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace
}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "


/***********************************************************/
/* Getting SOD data */
/***********************************************************/
local Data "CountyBanking"

noi disp "Obtaining SOD county data
noi disp "Verifying that `Data'.dta does not already exist..."
* This returns an error if the file does not exist.
capture confirm file `Data'.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."

	*******************************************************************************
	****Generating SOD county data for merge***************************************
	*******************************************************************************

	*Pulling in SOD data
	noi disp "Loading SOD data..."
	import delimited "C:\LOCAL DATA\Temp Data\Walmart\SOD_Data.csv", clear
		
	
	/*
	preserve
		destring asset depsumbr, ignore(",") replace
		sort cert stcntybr year
		display "1"
		collapse (sum) depsumbr (max) asset, by(cert stcntybr year)
		compress
		display "1"
		save "$LocalData\SOD_Data_collapsed.dta", replace
	run
*/
	sort year
	
	noi disp "Merging in CPI data..."
	merge m:1 year using "$LocalData\CPI.dta"

	destring asset depsumbr, ignore(",") replace
	replace asset = asset/cpaltt01usa661s

	* Drop if there is no asset data.
	drop if missing(asset)

	* Banks are "big" if they have more than $50 billion in assets"
	gen byte big = asset>50000000
	label var big "Assets > $50 billion"

	* Banks are "small" if they have more than $1 billion in assets"
	gen small = asset<1000000
	label var small "Assets < $1 billion"
	
	* Generating Bank-year and Bank-County-Year deposits and branches
	bysort cert year: egen bank_dep = sum(depsumbr)
	label var bank_dep "Total Bank Deposits"
	bysort cert stcntybr year: egen bank_cnty_dep = sum(depsumbr)
	label var bank_cnty_dep "Total Bank County Deposits"
	bysort cert stcntybr year: egen bank_br = count(depsumbr)
	label var bank_br "Total Bank County Branches"
	
	* Keep only first observation by cert, stcntybr, and year
	duplicates drop cert stcntybr year, force
	
	
	bysort stcntybr year: egen cnty_dep = sum(bank_cnty_dep)
	label var cnty_dep "Total County Deposits"
	bysort stcntybr year: egen cnty_br = sum(bank_br)
	label var cnty_br "Total County Branches"
	bysort cert stcntybr year: gen obs = _n 
	bysort stcntybr year: egen cnty_bank = sum(obs)
	label var cnty_bank "Total County Banks"
	
	gen bank_county_wght = bank_cnty_dep/bank_dep
	sort cert year
	by cert year: egen total = sum(bank_county_wght)
	
	drop _merge
	
	save "$LocalData\Bank_Dep_Wghts.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace
			
	*Create Large deposit share by county
	gen temp = bank_cnty_dep if big==1
	bysort stcntybr year: egen big_dep = sum(temp)
	drop temp
	label var big_dep "Deposits held by large banks"
	gen large_dep_share = big_dep/cnty_dep
	label var large_dep_share "Share of deposits held by large banks"

	*Create Large branch share by county
	gen temp = bank_br if big==1
	bysort stcntybr year: egen big_br = sum(temp)
	gen large_br_share = big_br/cnty_br
	drop temp
	
	*Create Large bank by county
	gen temp = 1 if big==1
	bysort stcntybr year: egen large_bank = sum(temp)
	gen large_bank_share = large_bank/cnty_bank
	drop temp	

	*Create Small deposit share by county
	gen temp = bank_cnty_dep if small==1
	bysort stcntybr year: egen small_dep = sum(temp)
	gen small_dep_share = small_dep/cnty_dep
	drop temp

	*Create Small branch share by county
	gen temp = bank_br if small==1
	bysort stcntybr year: egen small_br = sum(temp)
	gen small_br_share = small_br/cnty_br
	drop temp	

	*Create Small bank by county
	gen temp = 1 if small==1
	bysort stcntybr year: egen small_bank = sum(temp)
	gen small_bank_share = small_bank/cnty_bank
	drop temp	
				
	*Create HHI measure by county
	gen bank_cnty_dep_share2 = (bank_cnty_dep/cnty_dep)^2
	gen bank_br_share2 = (bank_br/cnty_br)^2
	bysort stcntybr year: egen hhi_dep = sum(bank_cnty_dep_share2)
	label var hhi_dep "Deposit HHI"
	bysort stcntybr year: egen hhi_br = sum(bank_br_share2)
	label var hhi_dep "Branch HHI"
		
	drop obs
	bysort stcntybr year: gen obs = _n 
	keep if obs==1
	rename stcntybr geography
	keep year geography cnty_dep cnty_br hhi_dep hhi_br large_dep_share big_dep big_br large_br_share small_dep small_dep_share small_br small_br_share small_bank_share large_bank large_bank_share 

	sort geography year
	
	
noi disp "Saving `Data'.dta..."
compress
save "$LocalData\\`Data'.dta", replace
save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace

}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "
	

/***********************************************************/
/* Obtain national employment growth by industry-firm size */
/***********************************************************/
local Data "CountyIndustryEmp_2digit"

noi disp "Obtaining county industrial data..."
noi disp "Verifying that `Data'.dta does not already exist..."
* This returns an error if the file does not exist.
capture confirm file `Data'.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
noi disp "Data not found.  Building data..."
noi disp "Loading US_county_firmsize.csv..."
import delimited "$LocalData\US_county_firmsize.csv", clear
noi disp "Finished loading data"
noi disp "$LocalData"

*We only need total employees by industry, not by-size
	*firmsize = 0 is aggregate employment by county-industry
	keep if firmsize==0
	drop firmsize

*Keeping Relevant (not 52, 92, aggregate, state) County-Industries for Analysis
keep if industry!="52" & industry!="92" & geo_level!="S" & industry!="0"
	/*If wanting to restrict to only those counties with material (i.e. non-censored) employment shares for every industry
	gen temp = cond(missing(emp),1,0)
	bysort geography year: egen empty = max(temp)
	keep if empty==0
	drop empty temp
	*/


noi disp "Keeping only county-industy pairs that have observations in both 2000."

*Keeping only data that exist in 2000
keep if year==2000

*Note that in removing industry 52 (and 92) we cannot rely on county aggregates for any employment numbers across industries
*Summing up county-industry employment
*Note: we are setting to "0" the empties by necessity of construction of Bartik. For smaller counties, this is likely a bigger issue for computing industry shares.
*If we restrict to only counties with reported values for each industry-county, we are left with ~300 counties or so. (I think results are robust to this). However, we could not extend this approach to 3-digit NAICS codes.
* Generate county-industry employment (c_i_emp)
noi gen c_i_emp = emp
noi replace c_i_emp = 0 if missing(c_i_emp)
label var c_i_emp "County-Industry Employment"

* Generate total county employment (c_emp)
bysort geography: egen c_emp = sum(c_i_emp)
label var c_i_emp "Total County Employment"
* Drop if there is no employment in the county
noi drop if c_emp <= 0

/*
** IS THIS NEEDED?
* Generate total national employment (n_emp)
egen n_emp = sum(c_i_emp)
label var c_i_emp "Total National Employment"
*/
	
* Generating County-Industy Shares in 2000
gen c_i_share_2000 = c_i_emp/c_emp

* Check to insure every county's shares sum to 1.
bysort geography: egen total = sum(c_i_share_2000)
* Due to rounding, some are negligibly less than 1 (0.9999999)
assert total > 0.99
drop total /* Not needed once check is complete */

keep geography industry c_i_share_2000 c_emp
		
*Merging In County Size Shares
* Join by geography
sort geography industry
* Combines datasets horizontally but forming all pairwise combinations within county (geography)
joinby geography industry using "$LocalData\CountyYearFirmSizes2digit.dta"

*Merging In National Industry-Firmsize Growth Rates
sort industry year
merge m:1 industry year using "$LocalData\NationalIndustryFirmGrowth2digit.dta"
drop _merge
		
	
****************************************************************************************
*Construction of Bartik Instrument******************************************************
****************************************************************************************
	
* Bartik Instrument: national industry growth for small and large firms, weighted by county-industry shares
noi disp "Using asgen to generate industry growth, by size, weighted by county-industry shares."

	gen sm_share_trend1 = c_i_share_2000*g_nat_ind_size1_1
	gen sm_share_trend3 = c_i_share_2000*g_nat_ind_size3_1
	gen sm_share_trend5 = c_i_share_2000*g_nat_ind_size5_1

	gen lg_share_trend1 = c_i_share_2000*g_nat_ind_size1_5
	gen lg_share_trend3 = c_i_share_2000*g_nat_ind_size3_5
	gen lg_share_trend5 = c_i_share_2000*g_nat_ind_size5_5

	bysort geography year: egen bartikGS1 = sum(sm_share_trend1) 
	bysort geography year: egen bartikGS3 = sum(sm_share_trend3) 
	bysort geography year: egen bartikGS5 = sum(sm_share_trend5) 
	bysort geography year: egen bartikGL1 = sum(lg_share_trend1) 
	bysort geography year: egen bartikGL3 = sum(lg_share_trend3) 
	bysort geography year: egen bartikGL5 = sum(lg_share_trend5) 
	
	gen bartik_noti_GS1 = (bartikGS1 - sm_share_trend1)*(1/(1-c_i_share_2000))
	gen bartik_noti_GS3 = (bartikGS3 - sm_share_trend3)*(1/(1-c_i_share_2000))
	gen bartik_noti_GS5 = (bartikGS5 - sm_share_trend5)*(1/(1-c_i_share_2000))
	gen bartik_noti_GL1 = (bartikGL1 - lg_share_trend1)*(1/(1-c_i_share_2000))
	gen bartik_noti_GL3 = (bartikGL3 - lg_share_trend3)*(1/(1-c_i_share_2000))
	gen bartik_noti_GL5 = (bartikGL5 - lg_share_trend5)*(1/(1-c_i_share_2000))
	
	
	
noi disp "Obtaining industry match data..."
noi disp "Verifying that this data does not already exist..."
* This returns an error if the file does not exist.
capture confirm file IndustryMatch.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Building IndustryMatch.dta..."
	preserve 

	*Defining Industry Names	
 	egen ind = group(industry)
 	keep ind industry
 	bysort ind: gen obs= _n
 	keep if obs==1
	drop obs
	
 	gen IndLabel = "Mining, Quarrying, Gas Extraction" if industry=="21"
 	replace IndLabel = "Agriculture, Forestry, Fishing, Hunting" if industry=="11"
 	replace IndLabel = "Utilities" if industry=="22"
 	replace IndLabel = "Construction" if industry=="23"
 	replace IndLabel = "Manufacturing" if industry=="31-33"
 	replace IndLabel = "Wholesale Trade" if industry=="42"
 	replace IndLabel = "Retail Trade" if industry=="44-45"
 	replace IndLabel = "Transportation and Warehousing" if industry=="48-49"
 	replace IndLabel = "Information" if industry=="51"
 	replace IndLabel = "Real Estate, Rental, Leasing" if industry=="53"
 	replace IndLabel = "Prof., Scientific, Tech Services" if industry=="54"
 	replace IndLabel = "Mgmt of Companies and Enterprises" if industry=="55"
 	replace IndLabel = "Admin and Support and Waste Mgmt" if industry=="56"
 	replace IndLabel = "Education" if industry=="61"
 	replace IndLabel = "Health Care, Social Assistance" if industry=="62"
 	replace IndLabel = "Arts, Entertainment, Recreation" if industry=="71"
 	replace IndLabel = "Accommodation and Food Serv" if industry=="72"
 	replace IndLabel = "Other (non Public Admin)" if industry=="81"
	
	compress
 	save "$LocalData\IndustryMatch.dta", replace
	save "$LocalData\Archive\IndustryMatch`CurrentDate'.dta", replace
	restore
}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "


noi disp "Obtaining initial shares data..."
noi disp "Verifying that this data does not already exist..."
* This returns an error if the file does not exist.
capture confirm file InitialShares2000_2digit.dta
/*
* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Building InitialShares2000_2digit.dta..."

	*Setting up Wide data to merge in Initial Shares and National Industry Firm-Size Growth Rates as columns into county-year data structure
	preserve 
	keep year geography c_i_share_2000 industry g_nat_ind_size* g_nat_ind_size* d_nat_ind_share1_1 d_nat_ind_share1_5
	drop if industry==" " | industry=="52"
	replace c_i_share_2000 = 0 if missing(c_i_share_2000)
	rename g_nat_ind_size1_1 g_nat_ind_size1_1_
	rename g_nat_ind_size3_1 g_nat_ind_size3_1_
	rename g_nat_ind_size5_1 g_nat_ind_size5_1_
	rename g_nat_ind_size15_1 g_nat_ind_size15_1_

	rename g_nat_ind_size1_5 g_nat_ind_size1_5_
	rename g_nat_ind_size3_5 g_nat_ind_size3_5_
	rename g_nat_ind_size5_5 g_nat_ind_size5_5_
	rename g_nat_ind_size15_5 g_nat_ind_size15_5_
	
	rename d_nat_ind_share1_1 d_nat_ind_share1_
	rename d_nat_ind_share1_5 d_nat_ind_share5_
	*Renaming NAICS 2-digit codes to allow for destringing (necessary for reshape function)
	replace industry="31" if industry=="31-33"
	replace industry="44" if industry=="44-45"
	replace industry="48" if industry=="48-49"
	destring industry, replace
	keep if !missing(industry)
*	reshape wide c_i_share_2000 g_nat_ind_size1_ g_nat_ind_size5_ d_nat_ind_share1_ d_nat_ind_share5_, i(geography year) j(industry)
	reshape wide c_i_share_2000 g_nat_ind_size* d_nat_ind_share*, i(geography year) j(industry)
	
	local ind_stub c_i_share_2000
	foreach cshare of varlist `ind_stub'* {
		bysort geography: egen temp = max(`cshare')
		replace `cshare' = temp if missing(`cshare')
		drop temp
		replace `cshare' = 0 if missing(`cshare')
	}
	
*	local growth g_nat_ind_size1_ 
	local growth g_nat_ind_size* 
	foreach g of varlist `growth'* {
		bysort year: egen temp = max(`g')
		replace `g' = temp if missing(`g')
		drop temp
		replace `g' = 0 if missing(`g')
	}
/*	
	local growth g_nat_ind_size5_ 
	foreach g of varlist `growth'* {
		bysort year: egen temp = max(`g')
		replace `g' = temp if missing(`g')
		drop temp
		replace `g' = 0 if missing(`g')
	}
*/	
*	local growth d_nat_ind_share1_ 
	local growth d_nat_ind_share1_1 d_nat_ind_share1_5 
	foreach g of varlist `growth'* {
		bysort year: egen temp = max(`g')
		replace `g' = temp if missing(`g')
		drop temp
		replace `g' = 0 if missing(`g')
	}

	/*
	local growth d_nat_ind_share5_ 
	foreach g of varlist `growth'* {
		bysort year: egen temp = max(`g')
		replace `g' = temp if missing(`g')
		drop temp
		replace `g' = 0 if missing(`g')
	}
*/
	compress
	save "$LocalData\InitialShares2000_2digit.dta", replace
	save "$LocalData\Archive\InitialShares2000_2digit_`CurrentDate'.dta", replace

	restore
}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "

		
/******************************************************************************/
/* Generating County Industrial Dataset                                       */
/******************************************************************************/


	bysort geography year: gen obs2 = _n 
	keep if obs2==1
	keep geography year c_emp bartikGS* bartikGL* bartikS* bartikL* gSmallFirm* gLargeFirm* dSmallShare* dLargeShare* emp_size1 emp_size5
	
	sort geography year
	compress

	merge 1:1 geography year using "$LocalData\InitialShares2000_2digit.dta"
	drop _merge
*/
	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace

}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "



	
*******************************************************************************
****Generating "Bartik Analytic Data    ***************************************
*******************************************************************************

noi disp "Obtaining initial shares data..."
noi disp "Verifying that BartikAnalyticData_2digit.dta does not already exist..."
* This returns an error if the file does not exist.
capture confirm file BartikAnalyticData_2digit.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Building BartikAnalyticData_2digit.dta..."


	noi disp "Loading CountyBanking.dta..."
	use "$LocalData\CountyBanking.dta", clear	
	noi disp "Merging CountyIndustryEmp_2digit.dta..."
	sort geography year
	merge geography year using "$LocalData\CountyIndustryEmp_2digit.dta"
	drop _merge
	sort geography year

	noi disp "Merging Demographic.dta..."
	merge geography year using "$LocalData\Demographic.dta"

	sort geography year
	*Checking unique geography years. 
	by geography year: gen nobs = _N
	su nobs
	drop nobs
	
	*Initial demographic data 
	keep if year>=2000
	*Restrict to observations with first observations in 2000. Lose 58 of 57,827 county-years.
	bysort geography: egen minyear = min(year)
	keep if minyear==2000
	drop minyear
	by geography: gen br2000 = cnty_br[1]
	by geography: gen emp2000 = c_emp[1]
	by geography: gen lnincome2000 = ln(income[1])
	by geography: gen income2000 = income[1]
	by geography: gen small_dep_share2000 = small_dep_share[1]
	by geography: gen large_dep_share2000 = large_dep_share[1]
	by geography: gen hhi_dep2000 = hhi_dep[1]
	
	egen empsize_total = rowtotal(emp_size*)
	gen c_sizeshare1 = emp_size1/empsize_total
	by geography: gen sm_size_share2000 = c_sizeshare1[3]
	
	gen pop2000 = exp(ln_pop_2000)

	gen lnemp2000 = ln(emp2000)
	gen lnbr2000 = ln(br2000)
	
	*Keeping only counties observed in 2000

	drop if missing(emp2000) | missing(br2000)

	*Defining level variables as 1000s, where appropriate
	replace pop2000 = pop2000/1000
	replace income2000 = income2000/1000
	gen emp_2000 = emp2000/1000

	/*  NEEDED? */
	*Some state IDs got lost in the mergers	
	gen state=floor(geography/1000)
	
	drop _merge
		
	*Saving Analytical Dataset	
	compress
	save "$LocalData\BartikAnalyticData_2digit.dta", replace
	save "$LocalData\Archive\BartikAnalyticData_2digit`CurrentDate'.dta", replace

}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "

use "$LocalData\BartikAnalyticData_2digit.dta", clear

su gSmallFirm1 c_i_share_2000 if year>2002, detail

sort geoind year

gen LgSmallFirmnoti3 = L3.gSmallFirmnoti3
gen LgLargeFirmnoti3 = L3.gLargeFirmnoti3
gen LgSmallFirmnoti1 = L.gSmallFirmnoti1
gen LgLargeFirmnoti1 = L.gLargeFirmnoti1
gen L3small_dep_share = L3.small_dep_share

gen LgSmallFirmnoti5 = L5.gSmallFirmnoti5
gen LgLargeFirmnoti5 = L5.gLargeFirmnoti5
gen L5small_dep_share = L5.small_dep_share

gen Lbartik_noti_GS3 = L3.bartik_noti_GS3
gen Lbartik_noti_GL3 = L3.bartik_noti_GL3

gen LbartikGS3 = L3.bartikGS3


gen LgSmallFirmCounty3 = L3.gSmallFirmCounty3

egen industry_year = group(industry year)

preserve
	keep if !missing(gSmallFirm3) & !missing(LgSmallFirmnoti3) & !missing(LgLargeFirmnoti3) & !missing(gLargeFirm3) & L3.county_emp0>10000 & L3.emp_size1>200 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5
	su gSmallFirm3 LgSmallFirmnoti3 LgLargeFirmnoti3 gLargeFirm3 

	reghdfe gSmallFirm3 c.LgSmallFirmnoti3##c.small_dep_share2000 , absorb(geography industry_year) cluster(geography)
	outreg2 using "$C:\LOCAL DATA\Temp Data\Walmart\Paper\Tables\RFS_BartikRegTable_Spillover",  tex replace ///
	ctitle("$\Delta$ln(SmEmp)") ///
	addtext(REG, OLS, COUNTY FE, YES, IND-YR FE, YES)

	reghdfe gSmallFirm3 c.LgLargeFirmnoti3##c.small_dep_share2000 , absorb(geography industry_year) cluster(geography)
	outreg2 using "C:\LOCAL DATA\Temp Data\Walmart\Tables\RFS_BartikRegTable_Spillover",  tex append ///
	ctitle("$\Delta$ln(SmEmp)") ///
	addtext(REG, OLS, COUNTY FE, YES, IND-YR FE, YES)

	reghdfe gLargeFirm3 c.LgSmallFirmnoti3##c.small_dep_share2000 , absorb(geography industry_year) cluster(geography)
	outreg2 using "C:\LOCAL DATA\Temp Data\Walmart\Tables\RFS_BartikRegTable_Spillover",  tex append ///
	ctitle("$\Delta$ln(LgEmp)") ///
	addtext(REG, OLS, COUNTY FE, YES, IND-YR FE, YES)

	reghdfe gLargeFirm3 c.LgLargeFirmnoti3##c.small_dep_share2000 , absorb(geography industry_year) cluster(geography)
	outreg2 using "C:\LOCAL DATA\Temp Data\Walmart\Tables\RFS_BartikRegTable_Spillover",  tex append ///
	ctitle("$\Delta$ln(SmEmp)") ///
	addtext(REG, OLS, COUNTY FE, YES, IND-YR FE, YES)

restore


preserve
	keep if !missing(gSmallFirm3) & !missing(LgSmallFirmnoti3) & !missing(LgLargeFirmnoti3) & !missing(gLargeFirm3) & L3.county_emp0>10000 & L3.emp_size1>200 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & industry=="23"
	su gSmallFirm3 LgSmallFirmnoti3 LgLargeFirmnoti3 gLargeFirm3 

	reghdfe gSmallFirm3 c.LgSmallFirmnoti3##c.small_dep_share2000 , absorb(geography industry_year) cluster(geography)
restore




reghdfe gSmallFirm3  c.LgSmallFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>1000 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="44-45", absorb(geography year ) cluster(state)

reghdfe gSmallFirm3  c.LgSmallFirmnoti3##c.small_dep_share2000 c.LgLargeFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>1000 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="44-45", absorb(geography year ) cluster(state)


reghdfe gSmallFirm3  c.LgSmallFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>1000 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="44-45", absorb(geography ) cluster(state)

reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="11", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="21", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="22", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="23", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="31-33", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="42", absorb(geography ) cluster(state)
reghdfe gSmallFirm3  c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>1000 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="44-45", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="48-49", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="51", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="53", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="54", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="55", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="56", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="61", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="62", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="71", absorb(geography ) cluster(state)
reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp0>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5  & industry=="81", absorb(geography ) cluster(state)




sort geography year
by geography: gen geo_obs = _n
gen share2000 = small_dep_share2000 if geo_obs==1 & county_emp1>1000 & year==2000

bysort year: quantiles share2000, gen(quintile) n(5)
bysort geography: egen quintile_share2000 = max(quintile)

bysort year: quantiles share2000, gen(decile) n(10)
bysort geography: egen decile_share2000 = max(decile)

sort geoind year

forvalues i=1/5{
reghdfe gSmallFirm1 g_nat_ind_size1_1 L.gSmallFirmnoti3 if year>=2002 & L.county_emp1>1000 & L.emp_size1>100  & abs(gSmallFirm1)<1  & quintile_share2000==`i', absorb(geography year ) cluster(state industry)
}

sort geoind year
forvalues i=1/5{
reghdfe gSmallFirm1 L.gSmallFirmnoti1 if year>=2002 & L.county_emp1>1000 & L.emp_size1>100  & abs(gSmallFirm1)<1  & quintile_share2000==`i' & industry=="23", absorb(geography year ) cluster(state )
}




reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmnoti3##c.small_dep_share2000  if year>=2002 & L3.county_emp1>1000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5  & urban_2000==1, absorb(geography year ) cluster(state)

reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmnoti3##c.small_dep_share2000 c.LgLargeFirmnoti3##c.small_dep_share2000  if year>=2002 & L3.county_emp1>1000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5  & urban_2000==1, absorb(geography year ) cluster(state industry)

reghdfe gSmallFirm3 c.LgSmallFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_emp1>1000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5  & urban_2000==1, absorb(geography industry_year ) cluster(state industry)

reghdfe gSmallFirm3 c.LgSmallFirmnoti3##c.small_dep_share2000 c.LgLargeFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_emp1>1000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5  & urban_2000==1, absorb(geography industry_year ) cluster(state industry)


reghdfe gSmallFirm5 g_nat_ind_size5_1 c.LgSmallFirmnoti5##c.small_dep_share2000  if year>=2002 & L5.county_emp1>1000 & L5.emp_size1>100 & (year==2002 | year==2007  | year==2012 | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5 , absorb(geography year ) cluster(state industry)

reghdfe gSmallFirm5 g_nat_ind_size5_1 c.LgSmallFirmnoti5##c.small_dep_share2000 c.LgLargeFirmnoti5##c.small_dep_share2000 if year>=2002 & L5.county_emp1>1000 & L5.emp_size1>100 & (year==2002 | year==2007  | year==2012 | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5, absorb(geography year ) cluster(state industry)

reghdfe gSmallFirm5 g_nat_ind_size5_1 c.LgSmallFirmnoti5##c.small_dep_share2000 if year>=2002 & L5.county_emp1>1000 & L5.emp_size1>100 & (year==2002 | year==2007  | year==2012 | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5 , absorb(geography industry_year ) cluster(state industry)

reghdfe gSmallFirm5 g_nat_ind_size5_1 c.LgSmallFirmnoti5##c.small_dep_share2000 c.LgLargeFirmnoti5##c.small_dep_share2000 if year>=2002 & L5.county_emp1>1000 & L5.emp_size1>100 & (year==2002 | year==2007  | year==2012 | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5, absorb(geography industry_year ) cluster(state industry)



reghdfe gSmallFirm3 g_nat_ind_size3_1 L3.g_nat_ind_size3_1 L3.gSmallFirm3 LgSmallFirmnoti3  c.LgSmallFirmnoti3##c.small_dep_share2000  if year>=2002 & L3.county_emp1>1000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5  & urban_2000==1, absorb(geography year ) cluster(state industry)

reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirm3##c.small_dep_share2000  if year>=2002 & L3.county_emp1>1000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5  & urban_2000==1, absorb(geography year ) cluster(state industry)



egen geoyear = group(geography year)

gen L3gSmallFirm3 = L3.gSmallFirm3

reghdfe gSmallFirm3 c.LgSmallFirmnoti3##c.small_dep_share2000 c.LgSmallFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_emp1>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5  & abs(small_dep_share2000-0.5)<0.4, absorb(geography year ) cluster(state industry)

reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_emp1>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5  & abs(small_dep_share2000-0.5)<0.4, absorb(geography year ) cluster(state industry)

reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_emp1>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5 & abs(small_dep_share2000-0.5)<0.4, absorb(geography year) cluster(state industry )

reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmCounty3##c.small_dep_share2000 if year>=2002 & L3.county_emp1>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5 & abs(small_dep_share2000-0.5)<0.4, absorb(geography year) cluster(state industry )

reghdfe gSmallFirm3  c.bartik_noti_GS3##c.small_dep_share2000 if year>=2002 & L3.county_emp1>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5 & abs(small_dep_share2000-0.5)<0.4, absorb(geography year) cluster(state industry )

gen interact = small_dep_share2000*LgSmallFirmCounty3
gen ivinteract = small_dep_share2000* LbartikGS3

ivreghdfe gSmallFirm3 g_nat_ind_size3_1 (LgSmallFirmCounty3 interact = LbartikGS3 ivinteract)  if year>=2002 & L3.county_emp1>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5 & abs(small_dep_share2000-0.5)<0.5, absorb(geography year) cluster(state industry )


twoway scatter bartikGS3 gSmallFirmCounty3 if year>=2002 & L3.county_emp1>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirmCounty3)<0.5 



reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LbartikGS3##c.small_dep_share2000 if year>=2002 & L3.county_emp1>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & c_i_share_2000<0.5 & abs(gSmallFirm3)<5 , absorb(geography year) cluster(state industry )



reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgLargeFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_noti_emp5>10000  & L3.emp_size5>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & abs(gSmallFirm3)<5 & c_i_share_2000<0.5, absorb(geography year ) cluster(state year)

reghdfe gLargeFirm3 g_nat_ind_size3_5 c.LgSmallFirmnoti3##c.small_dep_share2000  if year>=2002 & L3.county_noti_emp1>10000  & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & abs(gLargeFirm3)<5 & c_i_share_2000<0.5, absorb(geography year ) cluster(state year)

reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmnoti3##c.small_dep_share2000 c.LgLargeFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_noti_emp1>10000  & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & abs(gSmallFirm3)<5 & c_i_share_2000<0.5, absorb(geography year ) cluster(state year)

reghdfe gSmallFirm3 c.LgSmallFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_noti_emp1>10000 &  L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & abs(gSmallFirm3)<5 & c_i_share_2000<0.5, absorb(geography industry_year ) cluster(state year)

reghdfe gSmallFirm3 c.LgSmallFirmnoti3##c.small_dep_share2000 c.LgLargeFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_noti_emp1>10000  & L3.emp_size1>100 & (year==2002 | year==2005 | year==2008 | year==2011  | year==2014  | year==2017) & abs(gSmallFirm3)<5 & c_i_share_2000<0.5, absorb(geography industry_year ) cluster(state year)

reghdfe gSmallFirm3 g_nat_ind_size3_1 c.LgSmallFirmnoti3##c.small_dep_share2000 if year>=2002 & L3.county_noti_emp1>10000 & L3.emp_size1>100 & (year==2002 | year==2005  | year==2008 | year==2011  | year==2014  | year==2017) & abs(gSmallFirm3)<5 & c_i_share_2000<0.5, absorb(geography year ) cluster(state year)


}